from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
#from selenium.webdriver.support.ui import Select
import time,sys
from lxml import etree

def parse_html(html):
    res = []
    dom = etree.HTML( html )
    date1 = dom.xpath('//thead/tr[1]/th[5]')
    #date1 = tr1.xpath('th[5]')
    res.append('# '+date1[0].text+'\n')
    funds = dom.xpath('//tbody/tr')
    #[@class="odd"]
    print('total:',len(funds))
    for f in funds:
        link_hm = f.find('td[3]/a')
        jjhm = link_hm.text
        href = link_hm.get('href')
        name = f.find('td[4]/a').text
        jz = f.find('td[5]').text
        ljjz = f.find('td[6]').text
        try:
            res.append('- %s %s http://fund.eastmoney.com/%s %s %s\n'%(jjhm,name,href,eval(jz),eval(ljjz)))
        except:
            pass
    return res

def save_data( html ):
    ret = parse_html(html)
    print('used:',len(ret))
    fp = open('output/hexun.md','w')
    for item in ret:
        fp.write(item)
    fp.close()

def test(filename):
    data = ''
    fp = open(filename,'r')
    for dat in fp:
        data = data + dat
    fp.close()
    save_data( data )
    
#test('hexun.htm')
#sys.exit(0)

from pyvirtualdisplay import Display
display = Display(visible=0,size=(1024,768))
display.start()
#dcap = dict(DesiredCapabilities.PHANTOMJS)
#dcap['phantomjs.page.settings.userAgent'] = ('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36')
try:
    url1='http://jingzhi.funds.hexun.com/jz'
    #div ID 'filterTable'
    #不分页 'getTbDataA()'
    #换页链接 ID ajax_page
    #Date enddate
    #driver = webdriver.Chrome(r'/usr/lib/chromium-browser/chromedriver')
    driver = webdriver.Firefox()
    driver.get( url1 )
    #显示 3 天前的数据
    #sel = driver.execute_script('document.getElementById("enddate").options[3].selected = true;')
    #element = driver.find_element_by_class_name('btnQuery')
    #element.send_keys(Keys.SPACE)
    time.sleep(2)
    #element  = WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.XPATH,'//input[@class="checkpage"]')))
    element = driver.find_element_by_xpath('//input[@class="checkpage"]')
    element.send_keys(Keys.SPACE)
    time.sleep(2)
    #element = WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.ID, "filterTable")))
    element = driver.find_element_by_id("filterTable")
    save_data( element.get_attribute('innerHTML') )
    print('data saved')
    driver.quit()
except Exception as e:
    print(e)
display.stop()
